***************
* This file prepares the oste-occupation dataset
* Author: Daniel Kopp
***************

import delimited using "data_raw\20220329_SECO2KOF_Tab1_OSTE_Beruf.csv" , clear

rename txt_isco5_de txt_isco5

foreach i in b_avam isco5  erfahrung ausbildung {
	preserve
		distinct cod_`i'
		duplicates drop cod_`i', force
		keep cod_`i' txt_`i'
		save "Help_files\label_`i'.dta", replace
	restore

	drop txt_`i'  
}

la def yesno 0 "no" 1 "yes", modify
foreach v of varlist abschluss_inland abschluss_ausland {
	rename `v' tmp
	replace tmp = ustrtrim(tmp)
	g `v' = 1 if (tmp == "Y")
	replace `v' = 0 if (tmp == "N")
	replace `v' = . if (tmp == "-")
	la val `v' yesno
	drop tmp	
}

distinct oste_id_avam	// 563'638

* Note that the order of the occupations does not have any significance. 
* To be able to reproduce the results, we randomly generate a fixed order within oste_id_avam 
* The only condition: We want the occupations where cod_erfahrung and/or cod_ausbildung are missing (-1) to appear at the end of the list

gen 	cod_erfa_missing = 0
replace cod_erfa_missing = 1 if cod_erfahrung==-1
gen cod_ausb_missing = 0
replace cod_ausb_missing = 1 if cod_ausbildung==-1

set seed 10000
gen random = runiform()

sort oste_id_avam cod_erfa_missing cod_ausb_missing random

bys oste_id_avam (cod_erfa_missing cod_ausb_missing): gen n_occup = _n

drop cod_erfa_missing cod_ausb_missing random

save "data_processed\oste_occupations.dta", replace



